if (!require("pacman")) install.packages("pacman")
pacman::p_load("knitr", "dplyr", "scales", "car", "doParallel", "ggpubr",
"mediation","tibble","stringr","ggplot2","psych","lavaan",
"semTools","ggrepel","vegan","phyloseq","FactoMineR","factoextra",
"gplots","corrplot","ggvegan",
install = TRUE)
library("MicrobiomeAnalystR")
## Change mediation function to print out up to 3 decimals for mediation
trace(mediation:::print.summary.mediate,
at = 11,
tracer = quote({
printCoefmat <- function(x, digits) {
p <- x[, 4] #p-values seem to be stored rounded
x[, 1:3] <- sprintf("%.5f", x[, 1:3])
x[, 4] <- sprintf("%.3f", p)
x[, 4] <- paste(p,gtools::stars.pval(p), sep = " ")
print(x, quote = FALSE, left = TRUE)
}
}),
print = FALSE)
## [1] "print.summary.mediate"
## Flatten the correlation matrix from upper or lower triangle into data.frame
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
row = rownames(cormat)[row(cormat)[ut]],
column = rownames(cormat)[col(cormat)[ut]],
coeff =(cormat)[ut],
p_value = pmat[ut]
)
}
## Seed
set.seed(123)
abundance.in <- read.delim("tsi_abundance.txt", check.names = F, sep = "\t")
metadata <- read.delim("tsi_metadata.txt", check.names = F)
abundance.spp <- read.delim("yarrabah_metaphlan_counts_species.tsv", check.names = F, row.names = 1, header = T)# Metaphlan only species-level
We used the MicrobiomeAnalystR package to explore and visualize the taxonomic the data by Island This was a quick way to identify any patterns in the data before embarking on time-consuming analyses
mbSet<-Init.mbSetObj()
## Warning in Cairo::CairoFonts("Arial:style=Regular", "Arial:style=Bold", :
## CairoFonts() has no effect on Windows. Please use par(family="...") to specify
## the desired font - see ?par.
## [1] "Init MicrobiomeAnalyst!"
mbSet<-SetModuleType(mbSet, "mdp")
mbSet<-ReadSampleTable(mbSet, "metadata_4_beta_diversity.txt");
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
mbSet<-Read16SAbundData(mbSet, "metaphlan_abundance_all.txt","text","Others/Not_specific","T");
mbSet<-SanityCheckData(mbSet, "text");
## [1] "Sanity check passed!"
mbSet<-PlotLibSizeView(mbSet, "norm_species_count","png");
mbSet<-CreatePhyloseqObj(mbSet, "text","Others/Not_specific","T")
mbSet<-ApplyAbundanceFilter(mbSet, "prevalence", 0, 0.1);
mbSet<-ApplyVarianceFilter(mbSet, "iqr", 0.0);
mbSet<-PerformNormalization(mbSet, "none", "none", "none");
## Loading required package: memoise
mbSet<-PlotAlphaData(mbSet, "orig","alpha_diver_Chao","Chao1","location","OTU", "default", "png");
mbSet<-PlotAlphaBoxData(mbSet, "alpha_diverbox_Chao","Chao1","location","default", "png");
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
mbSet<-PerformAlphaDiversityComp(mbSet, "tt","location");
## [1] "p-value: 0.026794; [T-test] statistic: -2.2504"
mbSet<-PlotAlphaData(mbSet, "orig","alpha_diver_Shannon","Shannon","location","OTU", "default", "png");
mbSet<-PlotAlphaBoxData(mbSet, "alpha_diverbox_Shannon","Shannon","location","default", "png");
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
mbSet<-PerformAlphaDiversityComp(mbSet, "tt","location");
## [1] "p-value: 0.035256; [T-test] statistic: -2.1355"
mbSet<-PlotAlphaData(mbSet, "orig","alpha_diver_Shannon_spp","Shannon","location","Species", "default", "png");
mbSet<-PlotAlphaBoxData(mbSet, "alpha_diverbox_Shannon_spp","Shannon","location","default", "png");
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
mbSet<-PerformAlphaDiversityComp(mbSet, "tt","location");
## [1] "p-value: 0.017406; [T-test] statistic: -2.4193"
mbSet<-PlotAlphaData(mbSet, "orig","alpha_diver_Chao_spp","Chao1","location","Species", "default", "png");
mbSet<-PlotAlphaBoxData(mbSet, "alpha_diverbox_Chao_spp","Chao1","location","default", "png");
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
## Warning: Use of `data$value` is discouraged. Use `value` instead.
mbSet<-PerformAlphaDiversityComp(mbSet, "tt","location");
## [1] "p-value: 0.0024939; [T-test] statistic: -3.1043"
mbSet<-PlotBetaDiversity(mbSet, "beta_diver_PCOA_spp","PCoA","bray","expfac","location","none","Species","Not_Assigned","Observed", "yes", "png", 72, "default");
mbSet<-PlotBetaDiversity(mbSet, "beta_diver_NMDS_spp","NMDS","bray","expfac","location","none","Species","Not_Assigned","Observed", "yes", "png", 72, "default");
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2223184
## Run 1 stress 0.2220119
## ... New best solution
## ... Procrustes: rmse 0.04591519 max resid 0.3680259
## Run 2 stress 0.2214421
## ... New best solution
## ... Procrustes: rmse 0.02275082 max resid 0.1939031
## Run 3 stress 0.2353974
## Run 4 stress 0.2255925
## Run 5 stress 0.2253363
## Run 6 stress 0.2308619
## Run 7 stress 0.2227583
## Run 8 stress 0.2415948
## Run 9 stress 0.2217357
## ... Procrustes: rmse 0.02934804 max resid 0.1929993
## Run 10 stress 0.2637086
## Run 11 stress 0.2394155
## Run 12 stress 0.2329377
## Run 13 stress 0.2246119
## Run 14 stress 0.2323597
## Run 15 stress 0.2324664
## Run 16 stress 0.2278253
## Run 17 stress 0.2367187
## Run 18 stress 0.2304623
## Run 19 stress 0.2378818
## Run 20 stress 0.2385042
## *** No convergence -- monoMDS stopping criteria:
## 1: no. of iterations >= maxit
## 19: stress ratio > sratmax
Raw NMDS Fig on first attempt in MicrobiomeAnalysitR
abundance <- column_to_rownames(abundance.in, var = "ID")
abundance <- abundance[!(apply(abundance, 1, function(y) all(y == 0.000))),] %>%
round(., 4) %>% sweep(., 1, rowSums(abundance), '/')
spp.m <- data.matrix(abundance)
## Shannon diversity must define that its the diversity function from vegan to be used
shanno_vegan <- as.data.frame(vegan::diversity(spp.m, index = "shannon", MARGIN = 1, base = exp(1)))%>%
rownames_to_column(var = "Species")
## Shannon evenness
x <- as.data.frame(t(spp.m))
x$taxonomy <- rownames(x)
## library(RAM)
shannon_evenness <- data.frame(RAM::evenness(data=list(x=x), index="shannon"), check.names=FALSE)
## Registered S3 method overwritten by 'labdsv':
## method from
## summary.dist ade4
shannon_evenness <- as.data.frame(t(shannon_evenness)) %>%
rownames_to_column(var = "Species")
# colnames(shannon_evenness) <- gsub("^X", "", colnames(shannon_evenness))
## Chao1 species richness
chao1 <- data.frame(estimateR(abundance.spp))
chao1 <- as.data.frame(t(chao1)) %>%
rownames_to_column(var = "Species")
chao1 <- chao1[1:2]
## Simpson species diversity
simpson_vegan <- as.data.frame(vegan::diversity(spp.m, index = "simpson", MARGIN = 1, base = exp(1)))%>%
rownames_to_column(var = "Species")
## Pielou's evenness
pielou <- data.frame(vegan::diversity(spp.m, index = "shannon", MARGIN = 1, base = exp(1))/log(specnumber(spp.m)))%>%
rownames_to_column(var = "Species")
## Bray-Curtis dissimilarity index
bray <- vegan::vegdist(t(spp.m), method = "bray", na.rm = T)
m <- data.frame(t(combn(colnames(spp.m),2)), as.numeric(bray))
names(m) <- c("c1", "c2", "bray_distance")
write.table(m, "bray_curtisMatrix.tsv", sep = "\t", row.names = F)
## Combine Alpha Measure
c <- plyr::join_all(list(pielou, shanno_vegan, shannon_evenness, simpson_vegan, chao1), by = "Species", type='left')
colnames(c) <- c("pielou_evenness", "shannon_diversity", "shannon_evenness", "simpson_diversity", "chao_richness")
# kable(head(c, 10))
write.table(c, "diversity_evenness.tsv", sep = "\t", col.names = T)
md <- subset(metadata, select = c("ID", "Site")) %>% `rownames<-` (NULL) %>% tibble::column_to_rownames(.,var = "ID")
permanova <-adonis2((abundance.in %>%
tibble::column_to_rownames(.,var = "ID") %>%
t()) ~ Site,
data = md,
permutations = 10000,
method = "bray")
permanova
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = (abundance.in %>% tibble::column_to_rownames(., var = "ID") %>% t()) ~ Site, data = md, permutations = 10000, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Site 1 0.7521 0.02863 2.8886 9.999e-05 ***
## Residual 98 25.5148 0.97137
## Total 99 26.2669 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Of course, there is a considerable difference in multivariate spread that could be the cause for the significant PERMANOVA result.
We took the data from the above analysis and made some cute plots in R
df_all<- read.csv("NMDS_primer.csv", row.names = 1) ## in dropbox TSI_analysis from primer7
centroids_all <- aggregate(cbind(NMDS1,NMDS2)~Site,df_all,mean)
##Link all dots to centroid
gg_all <- merge(df_all,aggregate(cbind(mean.X=NMDS1,mean.Y=NMDS2)~Site,df_all,mean),by="Site")
##PLOTS
#Test - With dots and lines
ggplot(data=gg_all, mapping=aes(NMDS1,NMDS2,color=factor(Site)))+
geom_point(size=1.5)+
stat_ellipse(aes(NMDS1,NMDS2,color=factor(Site)),type = "norm")#+
geom_segment(aes(mean.X, mean.Y, xend=NMDS1, yend=NMDS2))
## mapping: x = ~mean.X, y = ~mean.Y, xend = ~NMDS1, yend = ~NMDS2
## geom_segment: arrow = NULL, arrow.fill = NULL, lineend = butt, linejoin = round, na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
#With dots, lines, removed background, axis lines (http://docs.ggplot2.org/dev/vignettes/themes.html)
cols_DF <- c('#00007fff','#94631eff')
(all<- ggplot(data=gg_all, mapping=aes(NMDS1,NMDS2,color=factor(Site), fill = factor(Site))) +
theme_linedraw()+
stat_ellipse(aes(NMDS1,NMDS2, color=factor(Site)), geom = "polygon", alpha = 1/8, type = "norm", linetype = 2, size = 0.5)+
geom_vline(xintercept = 0, colour="darkgrey", linetype = 2) +
geom_hline(yintercept = 0, colour="darkgrey", linetype = 2) +
theme(axis.title = element_text(size=16),
legend.title = element_text(size = 16),
axis.text = element_blank(),
axis.ticks = element_blank()) +
geom_point(size=3) +
scale_color_manual(name = "Site",values=c('#00007fff','#94631eff')) +
scale_fill_manual(name = "Site", values=c('#00007fff','#94631eff')) +
theme_bw())
ggsave(all, filename = "NMDS.svg", height = 8, width = 10)
After some parameter fine-tuning (see manuscript M&M for exact final parameters) in R and beautification in GraphPad prism7 or Adobe Illustrator, the a) \(\alpha\) diversity on unfiltered species data and b) \(\beta\) diversity on filtered data looked like this:
Original Figure 1 NMDS
Original Figure 1 PCoA
P-Values in \(\alpha\)-diversity are ANOVA derived (See ANOVA below)
These data was also augmeneted with some Spearman correlations between metadata and species relative abundances
The species abundance and metadata were used to perform CCA for each island
abundance <- abundance.spp[!(apply(abundance.spp, 1, function(y) all(y == 0.000))),]
abundance <- round(abundance, 4)
abundance <- abundance[rowSums(abundance == 0) <= 70, ]
abundance <- sweep(abundance, 1, rowSums(abundance), '/')
meta <- meta[,c(1,2,6,12,13,15:19)]
names <- paste0(substr(rownames(abundance), 0, 1), ". ",stringr::word(rownames(abundance),2,-1,sep = "\\_"))
x_nms <- rownames(abundance)
for (i in 1:length(names)){
print(paste0(i," ", names[i]))
if (grepl("unclass",names[i])){
names[[i]] <- paste0((stringr::word(x_nms[[i]],1,sep = "\\_")),"*")
}
}
rownames(abundance) <- names
tsi_ccpna <- cca(X = t(abundance),Y = meta)
permutest(tsi_ccpna, permutations = 9999,model = "reduced", alpha =0.05)
anova(tsi_ccpna, perm.max=9999, alpha =0.05, beta = 0.01)
plot.new()
# svg("CCA_combined.svg", height = 7, width = 14)
par(mfrow=c(1,2))
##sites
plot(tsi_ccpna,choices=c(1,2),display=c("wa","bp"),type="points",scaling="species", col = "black",
xlim=c(-4,4),ylim=c(-4,4),
main = "", cex.lab=1.6,cex.axis=1.5, cex.sub=1.8, pch=19, cex = 2)
points(tsi_ccpna,disp="sites",pch=20, col= rev(colvec[as.factor(x$study_site)]),cex=1.3, scaling="species")
legend("topleft",title="Island", legend=levels(as.factor(x$study_site)), bty="n",col=colvec,pch=21,pt.bg=colvec,cex = 1.8)
##species
plot(tsi_ccpna,choices=c(1,2),display= "species", type="points",scaling="none",
xlim=c(-4,4),ylim=c(-4,4), col="grey",
main = "", cex.lab=1.6,cex.axis=1.5, cex.sub=1.8, pch=19, ylab = "")
points(tsi_ccpna,disp="species",pch=20, col="grey2",bg = "grey2", cex=1.3, scaling="sites")
text(tsi_ccpna,"species",pos=2,axis.bp=TRUE, cex=1, scaling="none", col="black", font = 1.5)
dev.off()
After some manual cleaning!
CCA PLOT
We sought to identify these patterns using Distance-based redundancy analysis in PRIMER7